/*	This program estimates retirement model using the Retirement Sample,
and computes predictions in the Main Analysis Sample.
It also tabulates the results of the model producing Table E1. */

***** Set directories 
local dir_raw 		"~/Dropbox/Retirement gaming/raw"
local dir_do 		"~/Dropbox/Retirement gaming/dataverse"
local dir_clean 	"~/Dropbox/Retirement gaming/clean"
local dir_output 	"~/Dropbox/Retirement gaming/output/dataverse"


** ESTIMATE ORDERED PROBIT MODEL OF RETIREMENT AGE **

* Get retirement sample data for model estimation
use  "`dir_clean'/retirmodeldata_retirsample.dta", clear

* Model variable list
local v1list = " self_empl selfnoempl first_larger first_manuf first_retail first_transport propm_refp propm_refp2 propm_served propm_served2 propm_predata  ever_overficto moverficto_noempl  "

* Additional variables for estimation model
g selfnoempl=self_empl* first_noempl
g propm_refp2 = propm_refp^2
g propm_served2 = propm_served^2
g moverficto_noempl =  moverficto * first_noempl

* Censor retirement at 67
replace agedisc_bcwstart=57 if agedisc_bcwstart>57

* Label variables
label var self_empl "Self employed"
label var selfnoempl "Self employed with no employees"
label var first_larger "Works at firm w/10+ workers"
label var first_manuf "Manufacturing"
label var first_retail "Retail/Hospitality"
label var first_transport "Transport/Communications"
label var propm_refp "Contribution density"
label var propm_refp2 "Contribution density squared"
label var propm_served "Share months served"
label var propm_served2 "Share months served squared"
label var propm_predata "Tenure at start of data"
label var ever_overficto "Reports above MCB"
label var moverficto_noempl "Share ref. period reporting above MCB"
			

	
* Estimate model and tabulate results (Table E.1)
estimates clear
eststo: oprobit agedisc_bcwstart `v1list', vce(robust)  
esttab using "`dir_output'/tableE1.tex", replace ///
	cells("b(fmt(3) star label(Coef.)) se(par label((Std. Err.)))")  ///
    stardrop(*cut*) eqlabels(none) ///
	f nomtitles nonumbers nogaps   prefoot(\hline)  label ///
	stats(N ll, fmt(0) label("Observations" "Log likelihood")) ///
	varlabels(cut1 "Threshold 60-61" cut2 "Threshold 61-62" cut3 "Threshold 62-63" cut4 "Threshold 63-64" cut5 "Threshold 64-65" cut6 "Threshold 65-66" cut7 "Threshold 66-67+")
	
	
* Predict retirement probabilities and save them
predict p0oprobit p1oprobit  p2oprobit p3oprobit p4oprobit p5oprobit p6oprobit p7oprobit , pr
keep i agedisc_bcwstart p*oprobit   
tab agedisc_bcwstart, g(p)
forvalues i=1/8 {
	local j=`i'-1
	rename p`i' p`j'obs
}
save "`dir_clean'/retirsample_predictions.dta", replace


** PREDICT RETIREMENT IN MAIN ANALYSIS SAMPLE **

* Get main analysis sample data for model prediction
drop _all
use "`dir_clean'/retirmodeldata_mainsample.dta"

* Model variable list
local v1list = " self_empl selfnoempl first_larger first_manuf first_retail first_transport propm_refp propm_refp2 propm_served propm_served2 propm_predata  ever_overficto moverficto_noempl  "

* Additional variables for model
g selfnoempl=self_empl* first_noempl
g propm_refp2 = propm_refp^2
g propm_served2 = propm_served^2
g moverficto_noempl =  moverficto * first_noempl

* Label variables
label var self_empl "Self employed"
label var selfnoempl "Self employed with no employees"
label var first_larger "Works at firm w/10+ workers"
label var first_manuf "Manufacturing"
label var first_retail "Retail/Hospitality"
label var first_transport "Transport/Communications"
label var propm_refp "Contribution density"
label var propm_refp2 "Contribution density squared"
label var propm_served "Share months served"
label var propm_served2 "Share months served squared"
label var propm_predata "Tenure at start of data"
label var ever_overficto "Reports above MCB"
label var moverficto_noempl "Share ref. period reporting above MCB"

* Predict retirement probabilities				
predict p0oprobit p1oprobit  p2oprobit p3oprobit p4oprobit p5oprobit p6oprobit p7oprobit , pr

* Compute cumulative probabilities and predicted retirement age
g expbcw= p1oprobit*1 + p2oprobit*2 + p3oprobit*3 + p4oprobit*4 + p5oprobit*5 + p6oprobit*6 + p7oprobit*7 
g expbcw_disc=round(expbcw) 
g F0oprobit=p0oprobit
g medbcw=0 if F0oprobit>=.5
forvalues a=1/7 {
	local j=`a'-1
	g F`a'oprobit=p`a'oprobit + F`j'oprobit 
	replace medbcw=`a' if F`a'oprobit>=.5 & F`j'oprobit<.5 
}
keep i p*oprobit expbcw expbcw_disc F*oprobit medbcw  

save "`dir_clean'/mainsample_predictions.dta", replace


exit
